rm(list=ls())

library(RColorBrewer)
library(RItools)
library(xtable)
library(reshape)
library(car)
library(ggplot2)
library(grid)
library(xtable)
library(jpeg)
library(extrafont)


lims = c(.1,.90)
#lims = c(.05,.95)


careful.user.check = F
demos = F
graphics = T
n.data = 5

##function for clustering standard errors
clx <-  function(fm, dfcw, cluster){
  library(sandwich)
  library(lmtest)
  M <- length(unique(cluster))
  N <- length(cluster)
  dfc <- (M/(M-1))*((N-1)/(N-fm$rank))
  u <- apply(estfun(fm),2,
             function(x) tapply(x, cluster, sum))
  vcovCL <- dfc*sandwich(fm, meat=crossprod(u)/N)*dfcw
  coeftest(fm, vcovCL) 
}



tests.matrix = matrix(nrow = 7,ncol = (n.data*2)+2)
row.names(tests.matrix) = c('Tall','Short','Difference (Tall-Short)','Difference-in-Differences','T-statistic','P-value (one tailed)','N')
colnames(tests.matrix) = c('I1','S1','I2','S2','I3','S3','I4','S4','I5','S5','IP','SP')

tests.matrix.trim = tests.matrix

main.tests.matrix = matrix(nrow = n.data+1, ncol = 11)

pooled.matrix = matrix(nrow = 0, ncol = 7)

covariate.matrix = matrix(nrow = 0, ncol = 5)

for(i in 1:n.data){
  if(i == 1){
    data = read.csv('../data/lines/lines_time_10_new.csv') #2011
  }   
  if(i == 2){
    data = read.csv('../data/lines/lines_time_11.csv') ##2011
  }
  if(i == 3){
    data = read.csv('../data/lines/lines_time_12.csv') ##2011
  }
  if(i == 4){
    data = read.csv('../data/lines/lines_10_replication_mod.csv') #2011
  }
  if(i == 5){
    data = read.csv('../data/lines/results_mod.csv') ##2012
  }
  
  data = data[unique(data$V6),]
  data = data[data$V10 == 1,]
  if(careful.user.check ==T){
    if(i %in% c(1,4)){
      data = data[data$Q100 %in% c('bandit','Bandit','BANDIT'),] 
    }
    if(i == 2){
      data = data[data$Q100 %in% c('cowboy','Cowboy','cowboy YEE-HAW!'),] 
    }
    if(i == 3){
      data = data[data$Q100 %in% c('politician','Politician','poliitician','Politician.'),] 
    }        
    if(i == 5){
      data = data[data$Q100 %in% c('cyclist','Cyclist','CYCLIST','Cyclist '),] 
    }        
    
  }
  
  
  data = data[data$V7 == 0,]  ##eiliminate previews
  
  print(i)
  print(nrow(data))
  
  
  data = data[,-grep('image',colnames(data))]
  data = data[,-grep('imag',colnames(data))]
  data = data[,-grep('ima',colnames(data))]
  data = data[,-grep('im',colnames(data))]
  
  segsT = data[,grep('seg_T',colnames(data))]
  segsTsum = apply(segsT,1,sum,na.rm=T)
  
  segsS = data[,grep('seg_s',colnames(data))]
  segsSsum = apply(segsS,1,sum,na.rm=T)
  
  
  intsT = data[,grep('int_T',colnames(data))]
  intsTsum = apply(intsT,1,sum,na.rm=T)
  
  intsS = data[,grep('int_s',colnames(data))]
  intsSsum = apply(intsS,1,sum,na.rm=T)
  
  outputs = as.data.frame(cbind(segsTsum,segsSsum,intsTsum,intsSsum))
  seg.diff = segsTsum - segsSsum
  int.diff = intsTsum - intsSsum
  
  outcome = t.test(x = (outputs$intsTsum - outputs$intsSsum) - (outputs$segsTsum - outputs$segsSsum))
  outputs$diff.diff = (outputs$intsTsum - outputs$intsSsum) - (outputs$segsTsum - outputs$segsSsum)
  fm = lm(diff.diff~1, data = outputs)
  
  
  
  tests.matrix[1,(i*2)-1] = mean(outputs$intsTsum); tests.matrix[1,i*2] = mean(outputs$segsTsum)
  tests.matrix[2,(i*2)-1] = mean(outputs$intsSsum); tests.matrix[2,i*2] = mean(outputs$segsSsum)
  tests.matrix[3,(i*2)-1] = mean(outputs$intsTsum) - mean(outputs$intsSsum); tests.matrix[3,i*2] = mean(outputs$segsTsum) - mean(outputs$segsSsum)
  tests.matrix[4,(i*2)-1] = outcome$estimate
  tests.matrix[5,(i*2)-1] = outcome$statistic; 
  tests.matrix[6,(i*2)-1] = outcome$p.value/2
  tests.matrix[7,(i*2)-1] = nrow(outputs)
  
  print(outcome)
  
  ##new print matrix
  main.tests.matrix[i,1] = mean(outputs$intsTsum - outputs$intsSsum)
  main.tests.matrix[i,2] = sd(outputs$intsTsum - outputs$intsSsum)
  main.tests.matrix[i,3] = mean(outputs$segsTsum - outputs$segsSsum)
  main.tests.matrix[i,4] = sd(outputs$segsTsum - outputs$segsSsum)
  main.tests.matrix[i,5] = summary(fm)$coefficients[1,'Estimate']
  main.tests.matrix[i,6] = summary(fm)$coefficients[1,'Std. Error']
  main.tests.matrix[i,7] = summary(fm)$coefficients[1,'t value']
  main.tests.matrix[i,8] = summary(fm)$coefficients[1,'Estimate'] - 1.96*summary(fm)$coefficients[1,'Std. Error'] 
  main.tests.matrix[i,9] = summary(fm)$coefficients[1,'Estimate'] + 1.96*summary(fm)$coefficients[1,'Std. Error'] 
  main.tests.matrix[i,10] = summary(fm)$coefficients[1,'Pr(>|t|)']/2
  main.tests.matrix[i,11] = nrow(outputs)
  
  
  ##trims
  segsTlims = quantile(segsTsum,lims); segsTtrim = segsTsum[segsTsum>=min(segsTlims)&segsTsum<=max(segsTlims)]
  segsSlims = quantile(segsSsum,lims); segsStrim = segsSsum[segsSsum>=min(segsSlims)&segsSsum<=max(segsSlims)]
  intsTlims = quantile(intsTsum,lims); intsTtrim = intsTsum[intsTsum>=min(intsTlims)&intsTsum<=max(intsTlims)]
  intsSlims = quantile(intsSsum,lims); intsStrim = intsSsum[intsSsum>=min(intsSlims)&intsSsum<=max(intsSlims)]
  
  segsDif = segsTsum-segsSsum
  segsTrim = quantile(segsDif,lims); segsTrim = (segsDif)[segsDif>=min(segsTrim)&segsDif<=max(segsTrim)]
  
  intsDif = intsTsum-intsSsum
  intsTrim = quantile(intsDif,lims); intsTrim = (intsDif)[intsDif>=min(intsTrim)&intsDif<=max(intsTrim)]
  
  DifDif = intsDif - segsDif
  allTrim = quantile(DifDif,lims); allTrim = (DifDif)[DifDif>=min(allTrim)&DifDif<=max(allTrim)]
  
  

  test = rep(i,length(segsDif))
  talls = cbind(segsTsum,intsTsum)
  shorts = cbind(segsSsum,intsSsum)
  talls.shorts = cbind(talls,shorts)
  diffs = cbind(segsDif,intsDif)
  results = cbind(talls.shorts,diffs) 
  results = cbind(results,test)
  
  
  pooled.matrix = rbind(pooled.matrix,results)
  
  covariate.dat = data[c('Q96','Q97')]
  ##recode age into years
  covariate.dat$Q96 = as.numeric(as.character(covariate.dat$Q96))
  if(i %in% c(1,2,3)){covariate.dat$Q96 = 2011-covariate.dat$Q96}else{covariate.dat$Q96 = 2012-covariate.dat$Q96}
  

  covariate.matrix = rbind(covariate.matrix,covariate.dat)
  
}
colnames(covariate.matrix) = c('age','gender')
pooled.matrix = as.data.frame(pooled.matrix)

dat = cbind(pooled.matrix,covariate.matrix)
dat$diff = dat$intsDif - dat$segsDif

write.csv(dat,'TableA21_data.csv',row.names = F)